REGISTER tw.jar;
log0 = LOAD '/hnbc/' USING PigStorage('\u0001') AS (line:chararray);
log1 = FOREACH log0 GENERATE FLATTEN(com.tw.udf.LogFormat(line)) as 
																	(datetime:chararray,
																	requesttime:chararray,
																	domain:chararray,
																	requesturl:chararray,
																	srcip:chararray,
																	srcprovince:chararray,
																	srcisp:chararray,
																	srciprange:chararray,
																	destip:chararray,
																	destprovince:chararray,
																	destisp:chararray,
																	destiprange:chararray,
																	type:chararray,
																	os:chararray,
																	browser:chararray,
																	referer:chararray,
																	phone:chararray);
log2 = GROUP log1 BY datetime;
log3 = FOREACH log2 {D = DISTINCT log1.srcip; GENERATE group as datetime, COUNT(log1) as pv,COUNT(D) AS uvs;};
STORE log3 INTO '/tw/warehouse/visitorcount0/' USING PigStorage();

log4 = GROUP log1 BY (datetime,srciprange);
log5 = FOREACH log4 {E = DISTINCT log1.srcip; GENERATE group.datetime as datetime,group.srciprange as srciprange,COUNT(E) AS rangeuvs;};
STORE log5 INTO '/tw/warehouse/visitorcount1/' USING PigStorage();

log6 = GROUP log1 BY (datetime,type);
log7 = FOREACH log6 GENERATE group.datetime as datetime,group.type as type,COUNT(log1) as typenum;
STORE log7 INTO '/tw/warehouse/type/' USING PigStorage();

log8 = 	GROUP log1 BY requesturl;
log9 =  FOREACH log8 GENERATE group as requesturl,COUNT(log1) as urlpv;
log10 = ORDER log9 BY urlpv DESC;
log11 =  LIMIT log10 1000;
STORE log11 INTO '/tw/warehouse/hoturl/' USING PigStorage();

log12 = FOREACH log9 GENERATE requesturl,com.tw.udf.UrlRangeUdf(urlpv) as urlrange:chararray;
log13 = GROUP log12 BY urlrange;
log14 = FOREACH log13 GENERATE group as urlrange,COUNT(log12) as urlrangecount;
STORE log14 INTO '/tw/warehouse/urlrange/' USING PigStorage();
										